#setwd("Research/BlueRed/Analysis/Replication")

library(MASS)
library(foreign)
library(arm)
source("superplot function.R")
load(file="cps.income.Rdata")
results=list()
years=seq(1968,2004,4)

state.region[8]="Northeast" # put Delaware into the Northeast
state.region[20]="Northeast" # put Maryland into the Northeast


#R
#2000 annenberg with region
annen2000.data <- read.dta("annen061229.dta",convert.factors=F)
annen2000.data <- annen2000.data[is.na(annen2000.data$income)==FALSE&is.na(annen2000.data$fips)==FALSE,]
state.income.data<-read.dta("avgincome_orig.dta",convert.factors=T)
income.new <- annen2000.data$income-3
y <- annen2000.data$rep_presvote
state.new <- annen2000.data$fips 
region <- annen2000.data$region
#region <- state.region[annen2000.data$state]
data<-cbind(annen2000.data, income.new, y, state.new)
n.state <-length(unique(state.new))
state.income<-as.list(rep(NA,9))
i <- min(state.income.data$st_year)
  while(i <= 2000){
state.income[[((i-min(state.income.data$st_year))/4)+1]] <- state.income.data[state.income.data$st_year==i,]
i <- i + 4
}
avg.income<-log(state.income[[9]]$st_inc10k)
n <- length(y) 
#income.new:  individual-level income
#state.new:  the state index variable
#avg.income:  state-level income (a vector of length 50)
avg.income.expanded <- avg.income[state.new]

# varying intercept 
fit.vi.2000 <- lmer (y ~ income.new + factor(region) + avg.income.expanded +
                     (1 | state.new), family=binomial(link="logit"))
display(fit.vi.2000)
coef(fit.vi.2000)

# varying intercept, varying slopes
fit.2000 <- lmer (y ~ income.new*factor(region) + income.new*avg.income.expanded +
                  (1 + income.new | state.new), family=binomial(link="logit"))
display (fit.2000)
# then, the coefs can be accessed using 
coef(fit.2000)

# region.indic<-read.dta("region_dummies_84.dta",convert.factors=F)
region.indic<-read.dta("region_indic_annen2000.dta",convert.factors=F)


#2004 annenberg with region
annen2004.data <- read.dta("annen2004_processed.dta",convert.factors=F)
annen2004.data <- annen2004.data[is.na(annen2004.data$state)==FALSE,]
state.income.data<-read.dta("avgincome_orig.dta",convert.factors=T)
income.new <- annen2004.data$income-3
y <- annen2004.data$votechoice
state.new <- annen2004.data$state 
region <- annen2004.data$region
#region <- state.region[annen2004.data$state]
data2004<-cbind(annen2004.data, income.new, y, state.new)
n.state <-length(unique(state.new))
state.income<-as.list(rep(NA,9))
i <- min(state.income.data$st_year)
  while(i <= 2004){
state.income[[((i-min(state.income.data$st_year))/4)+1]] <- state.income.data[state.income.data$st_year==i,]
i <- i + 4
}
avg.income<-log(state.income[[9]]$st_inc10k)
n <- length(y) 
#income.new:  individual-level income
#state.new:  the state index variable
#avg.income:  state-level income (a vector of length 50)
avg.income.expanded <- avg.income[state.new]

fit.vi.2004 <- lmer (y ~ income.new + factor(region) + avg.income.expanded + (1 | state.new), family=binomial(link="logit"))
display(fit.vi.2004)
coef(fit.vi.2004)

fit.2004 <- lmer (y ~ income.new*factor(region) + income.new*avg.income.expanded + 
(1 + income.new | state.new), family=binomial(link="logit"))
display (fit.2004)
#then, the coefs can be accessed using 
coef(fit.2004)
#region.indic <- read.dta("region_dummies_84.dta",convert.factors=F)
region.indic<-read.dta("region_indic_annen2000.dta",convert.factors=F)

### Plots ###

# Figure 3
## plot varying intercepts ##
# 2000
sl2000 = as.integer(rownames(coef(fit.vi.2000)[[1]]))
income.vi.slope.2000 <- coef(fit.vi.2000)$state.new[,2]  
income.vi.intercept.2000 <- coef(fit.vi.2000)$state.new[,1] + 
 region.indic[,1]*coef(fit.vi.2000)$state.new[,3] +
 region.indic[,2]*coef(fit.vi.2000)$state.new[,4] +
 region.indic[,3]*coef(fit.vi.2000)$state.new[,5] +
 coef(fit.vi.2000)$state.new[,6]*avg.income[sl2000]
results[[9]]=cbind(income.vi.intercept.2000, income.vi.slope.2000); rownames(results[[9]])=sl2000

# 2004
sl2004 = as.integer(rownames(coef(fit.vi.2004)[[1]]))
income.vi.slope.2004 <- coef(fit.vi.2004)$state.new[,2]  
income.vi.intercept.2004 <- coef(fit.vi.2004)$state.new[,1] + 
 region.indic[,1]*coef(fit.vi.2004)$state.new[,3] +
 region.indic[,2]*coef(fit.vi.2004)$state.new[,4] +
 region.indic[,3]*coef(fit.vi.2004)$state.new[,5] +
 coef(fit.vi.2004)$state.new[,6]*avg.income[sl2004]
results[[10]]=cbind(income.vi.intercept.2004, income.vi.slope.2004); rownames(results[[10]])=sl2004

windows(width=9.5,height=4)
superp=superplot(results, start=9, end=10, rows=1, columns=2, indiv=1, lmer=1, var.slope=1, lowbound=-2, hibound=2, sf=3)

savePlot("annen2000-2004_vi.superplot", type=c("pdf"))
savePlot("annen2000-2004_vi.superplot", type=c("ps"))
savePlot("figure3", type=c("eps"))

# Figure 4
## plot varying intercept, varying slopes ##
# 2000
sl2000 = as.integer(rownames(coef(fit.2000)[[1]]))
##
income.slope.2000 <- coef(fit.2000)$state.new[,2] + (coef(fit.2000)$state.new[,10])*(avg.income[sl2000] )+
 region.indic[,1]*coef(fit.2000)$state.new[,7] +
 region.indic[,2]*coef(fit.2000)$state.new[,8] +
 region.indic[,3]*coef(fit.2000)$state.new[,9] 
income.intercept.2000 <- coef(fit.2000)$state.new[,1] + (coef(fit.2000)$state.new[,6])*( avg.income [sl2000])+
 region.indic[,1]*coef(fit.2000)$state.new[,3] +
 region.indic[,2]*coef(fit.2000)$state.new[,4] +
 region.indic[,3]*coef(fit.2000)$state.new[,5] 
results[[9]]=cbind(income.intercept.2000,income.slope.2000); rownames(results[[9]])=sl2000

# 2004
sl2004 = as.integer(rownames(coef(fit.2004)[[1]]))
income.slope.2004 <- coef(fit.2004)$state.new[,2] + (coef(fit.2004)$state.new[,10])*(avg.income[sl2004] )+
 region.indic[,1]*coef(fit.2004)$state.new[,7] +
 region.indic[,2]*coef(fit.2004)$state.new[,8] +
 region.indic[,3]*coef(fit.2004)$state.new[,9] 
income.intercept.2004 <- coef(fit.2004)$state.new[,1] + (coef(fit.2004)$state.new[,6])*( avg.income [sl2004])+
 region.indic[,1]*coef(fit.2004)$state.new[,3] +
 region.indic[,2]*coef(fit.2004)$state.new[,4] +
 region.indic[,3]*coef(fit.2004)$state.new[,5] 
results[[10]]=cbind(income.intercept.2004,income.slope.2004); rownames(results[[10]])=sl2004

windows(width=9.5,height=4)
superp=superplot(results, start=9, end=10, rows=1, columns=2, indiv=1, lmer=1, var.slope=1, lowbound=-2, hibound=2, sf=3)

savePlot("annen2000-2004_superplot",type=c("pdf"))
savePlot("annen2000-2004_superplot",type=c("ps"))
savePlot("figure4",type=c("eps"))

# Figure 5
avg.income.2004<-(state.income[[9]]$st_income)
avg.income.2000<-(state.income[[9]]$st_income)

windows(width=9.5,height=4)
par(mfrow=c(1,2))

plot(c(19000,40000),c(0,.5), main=2000, cex.lab=1.4, cex.main=1.7, type="n", ylab="Slopes", xlab="Median income within state", cex.axis=1.5, yaxt="n", xaxt="n", mgp=c(2.5,.5,0))
axis(side=2,seq(0,.4,.2),cex.axis=1.4)
axis(side=1,c(20000,30000,40000),labels=c("$20,000","$30,000","$40,000"),cex.axis=1.4)
text((avg.income.2000[sl2000]),income.slope.2000, label=state.abb[sl2000],cex=.9)
#lines(lowess((avg.income.2000[sl2000]),income.slope.2000))
abline (0,0,col="gray")

plot(c(19000,40000),c(0,.5), main=2004, cex.lab=1.4, cex.main=1.75, type="n", ylab="Slopes", xlab="Median income within state", cex.axis=1.5, yaxt="n", xaxt="n", mgp=c(2.5,.5,0))
axis(side=2,seq(0,.4,.2),cex.axis=1.4)
axis(side=1,c(20000,30000,40000),labels=c("$20,000","$30,000","$40,000"),cex.axis=1.4)
text((avg.income.2004[sl2004]),income.slope.2004, label=state.abb[sl2004],cex=.9)
#lines(lowess((avg.income.2004[sl2004]),income.slope.2004))
abline (0,0,col="gray")

savePlot("annen2000-2004_slopes_income",type=c("pdf"))
savePlot("annen2000-2004_slopes_income",type=c("ps"))
savePlot("figure5",type=c("eps"))
